# Peptidoform level analysis #
source('D:/Pipeline comparisons/Writing/R Functions/Binomial Scores Function Peptidoform level.R')
source('D:/Pipeline comparisons/Writing/R Functions/FLR function Bin Adjusted.R')
source('D:/Pipeline comparisons/Writing/R Functions/Function frequency of site.R')
# We want to assess if there are any differences between the Max and MM collapsing methods using all rice data sets #
#> [conflicted] Will prefer dplyr::filter over any other package
suppressPackageStartupMessages(library("tidyverse"))
library(dplyr)
library(stringr)
library(useful)
library(MASS)
library(reshape2)
library(epiDisplay)
# First we calculate all binomial adjusted data #
#################################################
PXD000923A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD000923_pASTY.csv')
PXD002222A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD002222_pASTY.csv')
PXD002756A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD002756_pASTY.csv')
PXD004705A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD004705_pASTY.csv')
PXD004939A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD004939_pASTY.csv')
PXD005241A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD005241_pASTY.csv')
PXD012764A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD012764_pASTY.csv')
PXD019291A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD019291_pASTY.csv')
PXD000923A$dataset <- "PXD000923"
PXD002222A$dataset <- "PXD002222"
PXD002756A$dataset <- "PXD002756"
PXD004705A$dataset <- "PXD004705"
PXD004939A$dataset <- "PXD004939"
PXD005241A$dataset <- "PXD005241"
PXD012764A$dataset <- "PXD012764"
PXD019291A$dataset <- "PXD019291"
library(plyr)
PXD000923A_pform <- binAdjustPform(PXD000923A)
PXD002222A_pform <- binAdjustPform(PXD002222A)
PXD002756A_pform <- binAdjustPform(PXD002756A)
PXD004705A_pform <- binAdjustPform(PXD004705A)
PXD004939A_pform <- binAdjustPform(PXD004939A)
PXD005241A_pform <- binAdjustPform(PXD005241A)
PXD012764A_pform <- binAdjustPform(PXD012764A)
PXD019291A_pform <- binAdjustPform(PXD019291A)
detach(package:plyr)
PXD000923A_pform <- FLR_AdjTPP(PXD000923A_pform)
PXD002222A_pform <- FLR_AdjTPP(PXD002222A_pform)
PXD002756A_pform <- FLR_AdjTPP(PXD002756A_pform)
PXD004705A_pform <- FLR_AdjTPP(PXD004705A_pform)
PXD004939A_pform <- FLR_AdjTPP(PXD004939A_pform)
PXD005241A_pform <- FLR_AdjTPP(PXD005241A_pform)
PXD012764A_pform <- FLR_AdjTPP(PXD012764A_pform)
PXD019291A_pform <- FLR_AdjTPP(PXD019291A_pform)
PXD000923A_pformc <- PXD000923A_pform[setdiff(names(PXD000923A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD002222A_pformc <- PXD002222A_pform[setdiff(names(PXD002222A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD002756A_pformc <- PXD002756A_pform[setdiff(names(PXD002756A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD004705A_pformc <- PXD004705A_pform[setdiff(names(PXD004705A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD004939A_pformc <- PXD004939A_pform[setdiff(names(PXD004939A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005241A_pformc <- PXD005241A_pform[setdiff(names(PXD005241A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD012764A_pformc <- PXD012764A_pform[setdiff(names(PXD012764A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD019291A_pformc <- PXD019291A_pform[setdiff(names(PXD019291A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
AllRice_pASTY_pform <-dplyr::bind_rows(PXD000923A_pformc,PXD002222A_pformc, PXD002756A_pformc, PXD004705A_pformc,PXD004939A_pformc, PXD005241A_pformc,
PXD012764A_pformc,PXD019291A_pformc)
AllRice_pASTY_pform_Excluding_A <- AllRice_pASTY_pform[AllRice_pASTY_pform$Amino!="A",]
tab1(AllRice_pASTY_pform_Excluding_A$dataset)
AllRice_pSTY_pform <-dplyr::bind_rows(PXD000923_pform,PXD002222_pform, PXD002756_pform, PXD004705_pform,PXD004939_pform, PXD005241_pform,
PXD012764_pform,PXD019291_pform)
tab1(AllRice_pSTY_pform$dataset)
PXD000923A_pform_01 <- PXD000923A_pformc[1:max(which(PXD000923A_pformc$FLR_Adj_Score<=0.01)),]
PXD002222A_pform_01 <- PXD002222A_pformc[1:max(which(PXD002222A_pformc$FLR_Adj_Score<=0.01)),]
PXD002756A_pform_01 <- PXD002756A_pformc[1:max(which(PXD002756A_pformc$FLR_Adj_Score<=0.01)),]
PXD004705A_pform_01 <- PXD004705A_pformc[1:max(which(PXD004705A_pformc$FLR_Adj_Score<=0.01)),]
PXD004939A_pform_01 <- PXD004939A_pformc[1:max(which(PXD004939A_pformc$FLR_Adj_Score<=0.01)),]
PXD005241A_pform_01 <- PXD005241A_pformc[1:max(which(PXD005241A_pformc$FLR_Adj_Score<=0.01)),]
PXD012764A_pform_01 <- PXD012764A_pformc[1:max(which(PXD012764A_pformc$FLR_Adj_Score<=0.01)),]
PXD019291A_pform_01 <- PXD019291A_pformc[1:max(which(PXD019291A_pformc$FLR_Adj_Score<=0.01)),]
PXD000923A_pform_02.5 <- PXD000923A_pformc[1:max(which(PXD000923A_pformc$FLR_Adj_Score<=0.025)),]
PXD002222A_pform_02.5 <- PXD002222A_pformc[1:max(which(PXD002222A_pformc$FLR_Adj_Score<=0.025)),]
PXD002756A_pform_02.5 <- PXD002756A_pformc[1:max(which(PXD002756A_pformc$FLR_Adj_Score<=0.025)),]
PXD004705A_pform_02.5 <- PXD004705A_pformc[1:max(which(PXD004705A_pformc$FLR_Adj_Score<=0.025)),]
PXD004939A_pform_02.5 <- PXD004939A_pformc[1:max(which(PXD004939A_pformc$FLR_Adj_Score<=0.025)),]
PXD005241A_pform_02.5 <- PXD005241A_pformc[1:max(which(PXD005241A_pformc$FLR_Adj_Score<=0.025)),]
PXD012764A_pform_02.5 <- PXD012764A_pformc[1:max(which(PXD012764A_pformc$FLR_Adj_Score<=0.025)),]
PXD019291A_pform_02.5 <- PXD019291A_pformc[1:max(which(PXD019291A_pformc$FLR_Adj_Score<=0.025)),]
PXD000923A_pform_05 <- PXD000923A_pformc[1:max(which(PXD000923A_pformc$FLR_Adj_Score<=0.05)),]
PXD002222A_pform_05 <- PXD002222A_pformc[1:max(which(PXD002222A_pformc$FLR_Adj_Score<=0.05)),]
PXD002756A_pform_05 <- PXD002756A_pformc[1:max(which(PXD002756A_pformc$FLR_Adj_Score<=0.05)),]
PXD004705A_pform_05 <- PXD004705A_pformc[1:max(which(PXD004705A_pformc$FLR_Adj_Score<=0.05)),]
PXD004939A_pform_05 <- PXD004939A_pformc[1:max(which(PXD004939A_pformc$FLR_Adj_Score<=0.05)),]
PXD005241A_pform_05 <- PXD005241A_pformc[1:max(which(PXD005241A_pformc$FLR_Adj_Score<=0.05)),]
PXD012764A_pform_05 <- PXD012764A_pformc[1:max(which(PXD012764A_pformc$FLR_Adj_Score<=0.05)),]
PXD019291A_pform_05 <- PXD019291A_pformc[1:max(which(PXD019291A_pformc$FLR_Adj_Score<=0.05)),]
AllRice_pASTY_pform_01<-dplyr::bind_rows(PXD000923A_pform_01, PXD002222A_pform_01, PXD002756A_pform_01, PXD004705A_pform_01,
PXD004939A_pform_01, PXD005241A_pform_01, PXD012764A_pform_01, PXD019291A_pform_01)
AllRice_pASTY_pform_02.5<-dplyr::bind_rows(PXD000923A_pform_02.5, PXD002222A_pform_02.5, PXD002756A_pform_02.5, PXD004705A_pform_02.5,
PXD004939A_pform_02.5, PXD005241A_pform_02.5, PXD012764A_pform_02.5, PXD019291A_pform_02.5)
AllRice_pASTY_pform_05<-dplyr::bind_rows(PXD000923A_pform_05, PXD002222A_pform_05, PXD002756A_pform_05, PXD004705A_pform_05,
PXD004939A_pform_05, PXD005241A_pform_05, PXD012764A_pform_05, PXD019291A_pform_05)
AllRice_pASTY_pform_01_Excluding_A <- AllRice_pASTY_pform_01[AllRice_pASTY_pform_01$Amino!="A",]
tab1(AllRice_pASTY_pform_01_Excluding_A$dataset)
AllRice_pASTY_pform_02.5_Excluding_A <- AllRice_pASTY_pform_02.5[AllRice_pASTY_pform_02.5$Amino!="A",]
tab1(AllRice_pASTY_pform_02.5_Excluding_A$dataset)
AllRice_pASTY_pform_05_Excluding_A <- AllRice_pASTY_pform_05[AllRice_pASTY_pform_05$Amino!="A",]
tab1(AllRice_pASTY_pform_05_Excluding_A$dataset)
# Figure 8
library(plyr)
library(dplyr)
library(stringr)
library(useful)
library("data.table")
library("conflicted")
library(reshape2)
conflict_prefer("mutate", "dplyr")
suppressPackageStartupMessages(library("tidyverse"))
conflict_prefer("filter", "dplyr")
library(ggplot2)
library(dplyr)
library(epiDisplay)
library(gmodels)
source('D:/Pipeline comparisons/Writing/R Functions/GBS_Function.R')
# Peptidoform level using maximum #
PXD000923 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD000923A_pform.csv')
PXD002222 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD002222A_pform.csv')
PXD002756 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD002756A_pform.csv')
PXD004705 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD004705A_pform.csv')
PXD004939 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD004939A_pform.csv')
PXD005241 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD005241A_pform.csv')
PXD012764 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD012764A_pform.csv')
PXD019291 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD019291A_pform.csv')
PXD000923_01 <- PXD000923[1:max(which(PXD000923$FLR_Adj_Score<=0.01)),]
PXD002222_01 <- PXD002222[1:max(which(PXD002222$FLR_Adj_Score<=0.01)),]
PXD002756_01 <- PXD002756[1:max(which(PXD002756$FLR_Adj_Score<=0.01)),]
PXD004705_01 <- PXD004705[1:max(which(PXD004705$FLR_Adj_Score<=0.01)),]
PXD004939_01 <- PXD004939[1:max(which(PXD004939$FLR_Adj_Score<=0.01)),]
PXD005241_01 <- PXD005241[1:max(which(PXD005241$FLR_Adj_Score<=0.01)),]
PXD012764_01 <- PXD012764[1:max(which(PXD012764$FLR_Adj_Score<=0.01)),]
PXD019291_01 <- PXD019291[1:max(which(PXD019291$FLR_Adj_Score<=0.01)),]
PXD000923_05 <- PXD000923[1:max(which(PXD000923$FLR_Adj_Score<=0.05)),]
PXD002222_05 <- PXD002222[1:max(which(PXD002222$FLR_Adj_Score<=0.05)),]
PXD002756_05 <- PXD002756[1:max(which(PXD002756$FLR_Adj_Score<=0.05)),]
PXD004705_05 <- PXD004705[1:max(which(PXD004705$FLR_Adj_Score<=0.05)),]
PXD004939_05 <- PXD004939[1:max(which(PXD004939$FLR_Adj_Score<=0.05)),]
PXD005241_05 <- PXD005241[1:max(which(PXD005241$FLR_Adj_Score<=0.05)),]
PXD012764_05 <- PXD012764[1:max(which(PXD012764$FLR_Adj_Score<=0.05)),]
PXD019291_05 <- PXD019291[1:max(which(PXD019291$FLR_Adj_Score<=0.05)),]
AllRice_pform_Max<-dplyr::bind_rows( PXD000923,  PXD002222,  PXD002756,  PXD004705,
PXD004939,  PXD005241,  PXD012764,  PXD019291)
AllRice_pform_Max_01<-dplyr::bind_rows( PXD000923_01,  PXD002222_01,  PXD002756_01,  PXD004705_01,
PXD004939_01,  PXD005241_01,  PXD012764_01,  PXD019291_01)
AllRice_pform_Max_05<-dplyr::bind_rows( PXD000923_05,  PXD002222_05,  PXD002756_05,  PXD004705_05,
PXD004939_05,  PXD005241_05,  PXD012764_05,  PXD019291_05)
AllRice_pform_Max$New_FLR_PEP <-AllRice_pform_Max$FLR_Adj_Score
AllRice_pform_Max_01$New_FLR_PEP <-AllRice_pform_Max_01$FLR_Adj_Score
AllRice_pform_Max_05$New_FLR_PEP <-AllRice_pform_Max_05$FLR_Adj_Score
AllRice_pform_Max_Final <- GSB_Function(AllRice_pform_Max_01,AllRice_pform_Max_05)
str(AllRice_pform_Max_Final)
AllRice_pform_Max_Final$PROTEIN_POS<-NULL
AllRice_pform_Max_Final$PRO_pos_list<-NULL
AllRice_pform_Max_Final$PTM_length<-NULL
AllRice_pform_Max_Final$PTM_beg2<-NULL
AllRice_pform_Max_Final$PTM_end2<-NULL
AllRice_pform_Max_Final$PTM_End<-NULL
AllRice_pform_Max_Final$PTM_Beginning<-NULL
RiceY <- AllRice_pform_Max_Final[(AllRice_pform_Max_Final$Amino=="Y") & (AllRice_pform_Max_Final$cat != "Bronze"),]
CrossTable(AllRice_pform_Max_Final$cat, AllRice_pform_Max_Final$Amino)
# Peptidoform level analysis #
source('D:/Pipeline comparisons/Writing/R Functions/Binomial Scores Function Peptidoform level.R')
source('D:/Pipeline comparisons/Writing/R Functions/FLR function Bin Adjusted.R')
source('D:/Pipeline comparisons/Writing/R Functions/Function frequency of site.R')
# We want to assess if there are any differences between the Max and MM collapsing methods using all rice data sets #
#> [conflicted] Will prefer dplyr::filter over any other package
suppressPackageStartupMessages(library("tidyverse"))
library(dplyr)
library(stringr)
library(useful)
library(MASS)
library(reshape2)
library(epiDisplay)
# First we calculate all binomial adjusted data #
#################################################
PXD000923A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD000923_pASTY.csv')
PXD002222A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD002222_pASTY.csv')
PXD002756A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD002756_pASTY.csv')
PXD004705A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD004705_pASTY.csv')
PXD004939A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD004939_pASTY.csv')
PXD005241A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD005241_pASTY.csv')
PXD012764A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD012764_pASTY.csv')
PXD019291A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD019291_pASTY.csv')
PXD000923A$dataset <- "PXD000923"
PXD002222A$dataset <- "PXD002222"
PXD002756A$dataset <- "PXD002756"
PXD004705A$dataset <- "PXD004705"
PXD004939A$dataset <- "PXD004939"
PXD005241A$dataset <- "PXD005241"
PXD012764A$dataset <- "PXD012764"
PXD019291A$dataset <- "PXD019291"
# pASTY peptidoform level #
##########################
library(plyr)
PXD000923A_pform <- binAdjustPform(PXD000923A)
PXD002222A_pform <- binAdjustPform(PXD002222A)
PXD002756A_pform <- binAdjustPform(PXD002756A)
PXD004705A_pform <- binAdjustPform(PXD004705A)
PXD004939A_pform <- binAdjustPform(PXD004939A)
PXD005241A_pform <- binAdjustPform(PXD005241A)
PXD012764A_pform <- binAdjustPform(PXD012764A)
PXD019291A_pform <- binAdjustPform(PXD019291A)
detach(package:plyr)
PXD000923A_pform <- FLR_AdjTPP(PXD000923A_pform)
PXD002222A_pform <- FLR_AdjTPP(PXD002222A_pform)
PXD002756A_pform <- FLR_AdjTPP(PXD002756A_pform)
PXD004705A_pform <- FLR_AdjTPP(PXD004705A_pform)
PXD004939A_pform <- FLR_AdjTPP(PXD004939A_pform)
PXD005241A_pform <- FLR_AdjTPP(PXD005241A_pform)
PXD012764A_pform <- FLR_AdjTPP(PXD012764A_pform)
PXD019291A_pform <- FLR_AdjTPP(PXD019291A_pform)
PXD000923A_pformc <- PXD000923A_pform[setdiff(names(PXD000923A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD002222A_pformc <- PXD002222A_pform[setdiff(names(PXD002222A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD002756A_pformc <- PXD002756A_pform[setdiff(names(PXD002756A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD004705A_pformc <- PXD004705A_pform[setdiff(names(PXD004705A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD004939A_pformc <- PXD004939A_pform[setdiff(names(PXD004939A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005241A_pformc <- PXD005241A_pform[setdiff(names(PXD005241A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD012764A_pformc <- PXD012764A_pform[setdiff(names(PXD012764A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD019291A_pformc <- PXD019291A_pform[setdiff(names(PXD019291A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
write.csv(PXD000923A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD000923A_pform.csv")
write.csv(PXD002222A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD002222A_pform.csv")
write.csv(PXD002756A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD002756A_pform.csv")
write.csv(PXD004705A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD004705A_pform.csv")
write.csv(PXD004939A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD004939A_pform.csv")
write.csv(PXD005241A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD005241A_pform.csv")
write.csv(PXD012764A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD012764A_pform.csv")
write.csv(PXD019291A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD019291A_pform.csv")
AllRice_pASTY_pform <-dplyr::bind_rows(PXD000923A_pformc,PXD002222A_pformc, PXD002756A_pformc, PXD004705A_pformc,PXD004939A_pformc, PXD005241A_pformc,
PXD012764A_pformc,PXD019291A_pformc)
AllRice_pASTY_pform_Excluding_A <- AllRice_pASTY_pform[AllRice_pASTY_pform$Amino!="A",]
tab1(AllRice_pASTY_pform_Excluding_A$dataset)
PXD000923A_pform_01 <- PXD000923A_pformc[1:max(which(PXD000923A_pformc$FLR_Adj_Score<=0.01)),]
PXD002222A_pform_01 <- PXD002222A_pformc[1:max(which(PXD002222A_pformc$FLR_Adj_Score<=0.01)),]
PXD002756A_pform_01 <- PXD002756A_pformc[1:max(which(PXD002756A_pformc$FLR_Adj_Score<=0.01)),]
PXD004705A_pform_01 <- PXD004705A_pformc[1:max(which(PXD004705A_pformc$FLR_Adj_Score<=0.01)),]
PXD004939A_pform_01 <- PXD004939A_pformc[1:max(which(PXD004939A_pformc$FLR_Adj_Score<=0.01)),]
PXD005241A_pform_01 <- PXD005241A_pformc[1:max(which(PXD005241A_pformc$FLR_Adj_Score<=0.01)),]
PXD012764A_pform_01 <- PXD012764A_pformc[1:max(which(PXD012764A_pformc$FLR_Adj_Score<=0.01)),]
PXD019291A_pform_01 <- PXD019291A_pformc[1:max(which(PXD019291A_pformc$FLR_Adj_Score<=0.01)),]
PXD000923A_pform_02.5 <- PXD000923A_pformc[1:max(which(PXD000923A_pformc$FLR_Adj_Score<=0.025)),]
PXD002222A_pform_02.5 <- PXD002222A_pformc[1:max(which(PXD002222A_pformc$FLR_Adj_Score<=0.025)),]
PXD002756A_pform_02.5 <- PXD002756A_pformc[1:max(which(PXD002756A_pformc$FLR_Adj_Score<=0.025)),]
PXD004705A_pform_02.5 <- PXD004705A_pformc[1:max(which(PXD004705A_pformc$FLR_Adj_Score<=0.025)),]
PXD004939A_pform_02.5 <- PXD004939A_pformc[1:max(which(PXD004939A_pformc$FLR_Adj_Score<=0.025)),]
PXD005241A_pform_02.5 <- PXD005241A_pformc[1:max(which(PXD005241A_pformc$FLR_Adj_Score<=0.025)),]
PXD012764A_pform_02.5 <- PXD012764A_pformc[1:max(which(PXD012764A_pformc$FLR_Adj_Score<=0.025)),]
PXD019291A_pform_02.5 <- PXD019291A_pformc[1:max(which(PXD019291A_pformc$FLR_Adj_Score<=0.025)),]
PXD000923A_pform_05 <- PXD000923A_pformc[1:max(which(PXD000923A_pformc$FLR_Adj_Score<=0.05)),]
PXD002222A_pform_05 <- PXD002222A_pformc[1:max(which(PXD002222A_pformc$FLR_Adj_Score<=0.05)),]
PXD002756A_pform_05 <- PXD002756A_pformc[1:max(which(PXD002756A_pformc$FLR_Adj_Score<=0.05)),]
PXD004705A_pform_05 <- PXD004705A_pformc[1:max(which(PXD004705A_pformc$FLR_Adj_Score<=0.05)),]
PXD004939A_pform_05 <- PXD004939A_pformc[1:max(which(PXD004939A_pformc$FLR_Adj_Score<=0.05)),]
PXD005241A_pform_05 <- PXD005241A_pformc[1:max(which(PXD005241A_pformc$FLR_Adj_Score<=0.05)),]
PXD012764A_pform_05 <- PXD012764A_pformc[1:max(which(PXD012764A_pformc$FLR_Adj_Score<=0.05)),]
PXD019291A_pform_05 <- PXD019291A_pformc[1:max(which(PXD019291A_pformc$FLR_Adj_Score<=0.05)),]
AllRice_pASTY_pform_01<-dplyr::bind_rows(PXD000923A_pform_01, PXD002222A_pform_01, PXD002756A_pform_01, PXD004705A_pform_01,
PXD004939A_pform_01, PXD005241A_pform_01, PXD012764A_pform_01, PXD019291A_pform_01)
AllRice_pASTY_pform_02.5<-dplyr::bind_rows(PXD000923A_pform_02.5, PXD002222A_pform_02.5, PXD002756A_pform_02.5, PXD004705A_pform_02.5,
PXD004939A_pform_02.5, PXD005241A_pform_02.5, PXD012764A_pform_02.5, PXD019291A_pform_02.5)
AllRice_pASTY_pform_05<-dplyr::bind_rows(PXD000923A_pform_05, PXD002222A_pform_05, PXD002756A_pform_05, PXD004705A_pform_05,
PXD004939A_pform_05, PXD005241A_pform_05, PXD012764A_pform_05, PXD019291A_pform_05)
AllRice_pASTY_pform_01_Excluding_A <- AllRice_pASTY_pform_01[AllRice_pASTY_pform_01$Amino!="A",]
tab1(AllRice_pASTY_pform_01_Excluding_A$dataset)
AllRice_pASTY_pform_02.5_Excluding_A <- AllRice_pASTY_pform_02.5[AllRice_pASTY_pform_02.5$Amino!="A",]
tab1(AllRice_pASTY_pform_02.5_Excluding_A$dataset)
AllRice_pASTY_pform_05_Excluding_A <- AllRice_pASTY_pform_05[AllRice_pASTY_pform_05$Amino!="A",]
tab1(AllRice_pASTY_pform_05_Excluding_A$dataset)
# Figure 8
library(plyr)
library(dplyr)
library(stringr)
library(useful)
library("data.table")
library("conflicted")
library(reshape2)
conflict_prefer("mutate", "dplyr")
suppressPackageStartupMessages(library("tidyverse"))
conflict_prefer("filter", "dplyr")
library(ggplot2)
library(dplyr)
library(epiDisplay)
library(gmodels)
source('D:/Pipeline comparisons/Writing/R Functions/GBS_Function.R')
# Peptidoform level using maximum #
PXD000923 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD000923A_pform.csv')
PXD002222 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD002222A_pform.csv')
PXD002756 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD002756A_pform.csv')
PXD004705 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD004705A_pform.csv')
PXD004939 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD004939A_pform.csv')
PXD005241 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD005241A_pform.csv')
PXD012764 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD012764A_pform.csv')
PXD019291 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD019291A_pform.csv')
PXD000923_01 <- PXD000923[1:max(which(PXD000923$FLR_Adj_Score<=0.01)),]
PXD002222_01 <- PXD002222[1:max(which(PXD002222$FLR_Adj_Score<=0.01)),]
PXD002756_01 <- PXD002756[1:max(which(PXD002756$FLR_Adj_Score<=0.01)),]
PXD004705_01 <- PXD004705[1:max(which(PXD004705$FLR_Adj_Score<=0.01)),]
PXD004939_01 <- PXD004939[1:max(which(PXD004939$FLR_Adj_Score<=0.01)),]
PXD005241_01 <- PXD005241[1:max(which(PXD005241$FLR_Adj_Score<=0.01)),]
PXD012764_01 <- PXD012764[1:max(which(PXD012764$FLR_Adj_Score<=0.01)),]
PXD019291_01 <- PXD019291[1:max(which(PXD019291$FLR_Adj_Score<=0.01)),]
PXD000923_05 <- PXD000923[1:max(which(PXD000923$FLR_Adj_Score<=0.05)),]
PXD002222_05 <- PXD002222[1:max(which(PXD002222$FLR_Adj_Score<=0.05)),]
PXD002756_05 <- PXD002756[1:max(which(PXD002756$FLR_Adj_Score<=0.05)),]
PXD004705_05 <- PXD004705[1:max(which(PXD004705$FLR_Adj_Score<=0.05)),]
PXD004939_05 <- PXD004939[1:max(which(PXD004939$FLR_Adj_Score<=0.05)),]
PXD005241_05 <- PXD005241[1:max(which(PXD005241$FLR_Adj_Score<=0.05)),]
PXD012764_05 <- PXD012764[1:max(which(PXD012764$FLR_Adj_Score<=0.05)),]
PXD019291_05 <- PXD019291[1:max(which(PXD019291$FLR_Adj_Score<=0.05)),]
AllRice_pform_Max<-dplyr::bind_rows( PXD000923,  PXD002222,  PXD002756,  PXD004705,
PXD004939,  PXD005241,  PXD012764,  PXD019291)
AllRice_pform_Max_01<-dplyr::bind_rows( PXD000923_01,  PXD002222_01,  PXD002756_01,  PXD004705_01,
PXD004939_01,  PXD005241_01,  PXD012764_01,  PXD019291_01)
AllRice_pform_Max_05<-dplyr::bind_rows( PXD000923_05,  PXD002222_05,  PXD002756_05,  PXD004705_05,
PXD004939_05,  PXD005241_05,  PXD012764_05,  PXD019291_05)
AllRice_pform_Max$New_FLR_PEP <-AllRice_pform_Max$FLR_Adj_Score
AllRice_pform_Max_01$New_FLR_PEP <-AllRice_pform_Max_01$FLR_Adj_Score
AllRice_pform_Max_05$New_FLR_PEP <-AllRice_pform_Max_05$FLR_Adj_Score
AllRice_pform_Max_Final <- GSB_Function(AllRice_pform_Max_01,AllRice_pform_Max_05)
str(AllRice_pform_Max_Final)
AllRice_pform_Max_Final$PROTEIN_POS<-NULL
AllRice_pform_Max_Final$PRO_pos_list<-NULL
AllRice_pform_Max_Final$PTM_length<-NULL
AllRice_pform_Max_Final$PTM_beg2<-NULL
AllRice_pform_Max_Final$PTM_end2<-NULL
AllRice_pform_Max_Final$PTM_End<-NULL
AllRice_pform_Max_Final$PTM_Beginning<-NULL
RiceY <- AllRice_pform_Max_Final[(AllRice_pform_Max_Final$Amino=="Y") & (AllRice_pform_Max_Final$cat != "Bronze"),]
CrossTable(AllRice_pform_Max_Final$cat, AllRice_pform_Max_Final$Amino)
df2 <- data.frame(Amino=rep(c("S", "T", "Y", "A"), each=3),
Level=rep(c("Gold:2898", "Silver:4200", "Bronze:7602"),4),
Unique_sites=c(2644, 3730, 6400, 244, 404, 742, 7, 19, 78, 3, 47, 382))
head(df2)
ggplot(data=df2, aes(x=Level, y=Unique_sites, fill=Amino)) +
geom_bar(stat="identity")
p <- df2 %>%
dplyr::arrange(Unique_sites) %>%
mutate(Level = factor(Level, levels=c("Gold:2898", "Silver:4200", "Bronze:7602"))) %>%
ggplot(aes(x=Level, y=Unique_sites, fill=Amino)) +
geom_bar(stat="identity") +
xlab("")
p + geom_text(aes(label = Unique_sites), position = position_stack(vjust = 0.8), size = 5) +
theme(text = element_text(size = 14))
# Peptidoform level analysis #
source('D:/Pipeline comparisons/Writing/R Functions/Binomial Scores Function Peptidoform level.R')
source('D:/Pipeline comparisons/Writing/R Functions/FLR function Bin Adjusted.R')
source('D:/Pipeline comparisons/Writing/R Functions/Function frequency of site.R')
#> [conflicted] Will prefer dplyr::filter over any other package
suppressPackageStartupMessages(library("tidyverse"))
library(dplyr)
library(stringr)
library(useful)
library(MASS)
library(reshape2)
library(epiDisplay)
PXD012764A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD012764_pASTY.csv')
PXD012764A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/TPP/PXD012764_pASTY.csv')
PXD012764A$dataset <- "PXD012764"
PXD012764A_pform <- binAdjustPform(PXD012764A)
library(plyr)
library(plyr)
PXD012764A_pform <- binAdjustPform(PXD012764A)
detach(package:plyr)
PXD012764A_pform <- FLR_AdjTPP(PXD012764A_pform)
PXD012764A_pformc <- PXD012764A_pform[setdiff(names(PXD012764A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
write.csv(PXD012764A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/pform/PXD012764A_pform.csv")
PXD012764A_pform_01 <- PXD012764A_pformc[1:max(which(PXD012764A_pformc$FLR_Adj_Score<=0.01)),]
PXD012764A_pform_02.5 <- PXD012764A_pformc[1:max(which(PXD012764A_pformc$FLR_Adj_Score<=0.025)),]
PXD012764A_pform_05 <- PXD012764A_pformc[1:max(which(PXD012764A_pformc$FLR_Adj_Score<=0.05)),]
